商業分析師學 R 語言的六個理由

教學

  • Python 與網站擷取(文化大學進修推廣部)
  • R 語言與視覺化應用(國立台北商業大學)
  • Python 與機器學習(玉山商業銀行)

工作

  • Kyosei.ai(V)
  • Coupang
  • SAS
  • CTBC

我的學習資源

你在咖啡廳做什麼?

六個理由

免費又好裝

不強調物件導向

內外兼修

溝通、溝通、還是溝通

活躍的社群支援

學起來很容易

可以在這裡試試就好

http://jupyter.org/

體驗到底有多容易安裝

從 Tidyverse 與 Gapminder 開始

Tidyverse

Tidyverse:R 語言學習之旅的新起點

Gapminder

安裝上述兩個套件

pkgs <- c("tidyverse", "gapminder")
install.packages(pkgs)

載入

library(tidyverse)
library(gapminder)

常用的函數

函數 用途
filter() 觀測值選擇
select() 變數選擇
mutate() 新增變數
arrange() 排序
summarise() 聚合
group_by() 分組

草帽海賊團資料

load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
straw_hat_df %>%
  dim()
straw_hat_df %>% 
  View()
straw_hat_df %>%
  summary()
straw_hat_df %>%
  str()

filter()

  • 選出女性船員
straw_hat_df %>%
  filter(gender == "Female")
##         name gender    occupation  bounty age birthday height
## 1       Nami Female     Navigator 6.6e+07  20    07-03    170
## 2 Nico Robin Female Archaeologist 1.3e+08  30    02-06    188

篩選兩個或更多條件

  • 選出女性船員並且年紀大於等於 30 歲
straw_hat_df %>%
  filter(gender == "Female" & age >= 30)
##         name gender    occupation  bounty age birthday height
## 1 Nico Robin Female Archaeologist 1.3e+08  30    02-06    188

select()

  • 選擇姓名與性別
straw_hat_df %>%
  select(name, gender)
##                name gender
## 1   Monkey D. Luffy   Male
## 2      Roronoa Zoro   Male
## 3              Nami Female
## 4             Usopp   Male
## 5    Vinsmoke Sanji   Male
## 6 Tony Tony Chopper   Male
## 7        Nico Robin Female
## 8            Franky   Male
## 9             Brook   Male

整合前面的函數

# 選出女性船員,但只回傳姓名就好
straw_hat_df %>%
  filter(gender == "Female") %>%
  select(name)
##         name
## 1       Nami
## 2 Nico Robin

mutate()

  • 新增兩年前的年紀
  • 利用 age 減去 2
straw_hat_df %>%
  mutate(age_2yr_ago = age - 2)
##                name gender    occupation   bounty age birthday height
## 1   Monkey D. Luffy   Male       Captain 5.00e+08  19    05-05    174
## 2      Roronoa Zoro   Male     Swordsman 3.20e+08  21    11-11    181
## 3              Nami Female     Navigator 6.60e+07  20    07-03    170
## 4             Usopp   Male        Sniper 2.00e+08  19    04-01    176
## 5    Vinsmoke Sanji   Male          Cook 1.77e+08  21    03-02    180
## 6 Tony Tony Chopper   Male        Doctor 1.00e+02  17    12-24     90
## 7        Nico Robin Female Archaeologist 1.30e+08  30    02-06    188
## 8            Franky   Male    Shipwright 9.40e+07  36    03-09    240
## 9             Brook   Male      Musician 8.30e+07  90    04-03    277
##   age_2yr_ago
## 1          17
## 2          19
## 3          18
## 4          17
## 5          19
## 6          15
## 7          28
## 8          34
## 9          88

arrange()

  • age 排序
straw_hat_df %>%
  arrange(age)
##                name gender    occupation   bounty age birthday height
## 1 Tony Tony Chopper   Male        Doctor 1.00e+02  17    12-24     90
## 2   Monkey D. Luffy   Male       Captain 5.00e+08  19    05-05    174
## 3             Usopp   Male        Sniper 2.00e+08  19    04-01    176
## 4              Nami Female     Navigator 6.60e+07  20    07-03    170
## 5      Roronoa Zoro   Male     Swordsman 3.20e+08  21    11-11    181
## 6    Vinsmoke Sanji   Male          Cook 1.77e+08  21    03-02    180
## 7        Nico Robin Female Archaeologist 1.30e+08  30    02-06    188
## 8            Franky   Male    Shipwright 9.40e+07  36    03-09    240
## 9             Brook   Male      Musician 8.30e+07  90    04-03    277

summarise()

  • 計算平均年齡 mean_age
straw_hat_df %>%
  summarise(mean_age = mean(age))
##   mean_age
## 1 30.33333

group_by()

  • 依照性別計算平均年齡 mean_age
straw_hat_df %>%
  group_by(gender) %>%
  summarise(mean_age = mean(age))
## # A tibble: 2 x 2
##   gender mean_age
##   <fctr>    <dbl>
## 1 Female 25.00000
## 2   Male 31.85714

gapminder 資料

gapminder %>%
  dim()
gapminder %>% 
  View()
gapminder %>%
  summary()
gapminder %>%
  str()

篩選出台灣

gapminder %>%
  filter(___ == "___")

2007 年全球總人口數為多少?

gapminder %>%
  filter(___ == ___) %>%
  summarise(ttl_pop = sum(___))

承上各洲總人口數分別為多少,請由小到大排序

gapminder %>%
  filter(___ == ___) %>%
  group_by(___) %>%
  summarise(ttl_pop = sum(___)) %>%
  arrange(___)

開始來畫個圖如何?

散佈圖

gapminder_2007 <- gapminder %>%
  filter(year == 2007)
scatter <- ggplot(gapminder_2007, aes(x = gdpPercap, y = lifeExp, colour = continent)) +
  geom_point() +
  theme_minimal()
scatter

線圖

north_asia <- gapminder %>%
  filter(country %in% c("China", "Japan", "Taiwan", "Korea, Rep."))
line_plot <- ggplot(north_asia, aes(x = year, y = gdpPercap, colour = country)) +
  geom_line() +
  theme_minimal()
line_plot

直方圖

histogram <- ggplot(gapminder_2007, aes(x = gdpPercap)) +
  geom_histogram(bins = 20, fill = rgb(1, 0, 0, 0.5)) +
  theme_minimal()
histogram

盒鬚圖

box_plot <- ggplot(gapminder_2007, aes(x = continent, y = gdpPercap, colour = continent)) +
  geom_boxplot() +
  theme_minimal() +
  theme(legend.position = "none")
box_plot

長條圖

gdpPercap_2007_na <- gapminder %>%
  filter(year == 2007 & country %in% c("China", "Japan", "Taiwan", "Korea, Rep.")) %>%
  arrange(gdpPercap)

gdpPercap_2007_na$country <- factor(gdpPercap_2007_na$country, levels = gdpPercap_2007_na$country)

bar_plot <- ggplot(gdpPercap_2007_na, aes(x = country, y = gdpPercap, fill = country, alpha = 0.5)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  theme_minimal() +
  theme(legend.position = "none")
bar_plot